package com.novel.crawl.bxwx9.conf;

import com.novel.crawl.bxwx9.spider.BatterSpider;
import com.novel.crawl.bxwx9.spider.downloader.CustomHttpClientDownloader;
import com.novel.crawl.bxwx9.spider.pipeline.BookPipeline;
import com.novel.crawl.bxwx9.spider.pipeline.NovelPipeline;
import com.novel.crawl.bxwx9.spider.processor.BookProcessor;
import com.novel.crawl.bxwx9.spider.processor.NovelProcessor;
import com.novel.crawl.common.service.BookService;
import com.novel.crawl.common.service.ChapterService;
import com.novel.crawl.common.service.ContentService;
import org.springframework.boot.context.properties.EnableConfigurationProperties;
import org.springframework.context.annotation.Bean;
import org.springframework.context.annotation.Configuration;
import org.springframework.scheduling.concurrent.CustomizableThreadFactory;
import us.codecraft.webmagic.Spider;
import us.codecraft.webmagic.downloader.Downloader;
import us.codecraft.webmagic.pipeline.Pipeline;
import us.codecraft.webmagic.processor.PageProcessor;
import us.codecraft.webmagic.scheduler.QueueScheduler;

import java.util.concurrent.BlockingQueue;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.LinkedBlockingQueue;
import java.util.concurrent.ThreadPoolExecutor;
import java.util.concurrent.TimeUnit;

/**
 * 爬虫配置
 *
 * @author 奔波儿灞
 * @since 1.0
 */
@Configuration
@EnableConfigurationProperties({SpiderProperties.class, BookSpiderProperties.class})
public class SpiderConfiguration {

    /**
     * 页面解析
     *
     * @param bookService BookService
     * @param chapterService ChapterService
     * @param contentService ContentService
     * @return NovelProcessor
     */
    @Bean
    public PageProcessor processor(BookService bookService,
                                   ChapterService chapterService,
                                   ContentService contentService) {
        return new NovelProcessor(bookService, chapterService, contentService);
    }

    /**
     * 页面解析
     *
     * @return BookProcessor
     */
    @Bean
    public PageProcessor bookProcessor() {
        return new BookProcessor();
    }

    /**
     * 调度器
     *
     * @return QueueScheduler
     */
    @Bean
    public QueueScheduler scheduler() {
        return new QueueScheduler();
    }

    /**
     * 调度器
     *
     * @return QueueScheduler
     */
    @Bean
    public QueueScheduler bookScheduler() {
        return new QueueScheduler();
    }

    /**
     * 下载器，添加状态码判断
     *
     * @return CustomHttpClientDownloader
     */
    @Bean
    public Downloader downloader() {
        return new CustomHttpClientDownloader();
    }

    /**
     * 下载器，添加状态码判断
     *
     * @return CustomHttpClientDownloader
     */
    @Bean
    public Downloader bookDownloader() {
        return new CustomHttpClientDownloader();
    }

    /**
     * pipeline
     *
     * @return NovelPipeline
     */
    @Bean
    public Pipeline pipeline() {
        return new NovelPipeline();
    }

    /**
     * bookPipeline
     *
     * @param bookService BookService
     * @return BookPipeline
     */
    @Bean
    public Pipeline bookPipeline(BookService bookService) {
        return new BookPipeline(bookService);
    }

    /**
     * 自定义爬虫线程池
     * 特点：队列容量与最大线程池相同，充分利用线程，防止队列阻塞大量的请求
     *
     * @param properties 配置
     * @return ThreadPoolExecutor
     */
    @Bean
    public ExecutorService spiderExecutor(SpiderProperties properties) {
        BlockingQueue<Runnable> workQueue = new LinkedBlockingQueue<>(properties.getPoolSize());
        return new ThreadPoolExecutor(properties.getPoolSize(), properties.getPoolSize(),
                properties.getKeepAliveSeconds(), TimeUnit.SECONDS, workQueue,
                new CustomizableThreadFactory(properties.getThreadNamePrefix()),
                new ThreadPoolExecutor.CallerRunsPolicy());
    }

    /**
     * 自定义爬虫线程池
     * 特点：队列容量与最大线程池相同，充分利用线程，防止队列阻塞大量的请求
     *
     * @param properties 配置
     * @return ThreadPoolExecutor
     */
    @Bean
    public ExecutorService bookSpiderExecutor(BookSpiderProperties properties) {
        BlockingQueue<Runnable> workQueue = new LinkedBlockingQueue<>(properties.getPoolSize());
        return new ThreadPoolExecutor(properties.getPoolSize(), properties.getPoolSize(),
                properties.getKeepAliveSeconds(), TimeUnit.SECONDS, workQueue,
                new CustomizableThreadFactory(properties.getThreadNamePrefix()),
                new ThreadPoolExecutor.CallerRunsPolicy());
    }

    /**
     * 配置爬虫
     *
     * @param processor 页面解析
     * @param scheduler 调度器
     * @param pipeline pipeline
     * @param downloader 下载器
     * @param properties 爬虫配置
     * @param spiderExecutor 爬虫线程池
     * @return Spider
     */
    @Bean
    public Spider spider(PageProcessor processor, QueueScheduler scheduler, Pipeline pipeline, Downloader downloader,
                         SpiderProperties properties, ExecutorService spiderExecutor) {
        return BatterSpider.create(processor)
                .setScheduler(scheduler)
                .setDownloader(downloader)
                .addPipeline(pipeline)
                .thread(spiderExecutor, properties.getPoolSize());
    }


    /**
     * 配置爬虫
     *
     * @param bookProcessor 页面解析
     * @param bookScheduler 调度器
     * @param bookPipeline pipeline
     * @param bookDownloader 下载器
     * @param properties 爬虫配置
     * @param bookSpiderExecutor 爬虫线程池
     * @return Spider
     */
    @Bean
    public Spider bookSpider(PageProcessor bookProcessor, QueueScheduler bookScheduler,
                             Pipeline bookPipeline, Downloader bookDownloader,
                             BookSpiderProperties properties, ExecutorService bookSpiderExecutor) {
        return BatterSpider.create(bookProcessor)
                .setScheduler(bookScheduler)
                .setDownloader(bookDownloader)
                .addPipeline(bookPipeline)
                .thread(bookSpiderExecutor, properties.getPoolSize());
    }

}
